library(tidycensus) # connect with the census bureau API
library(tidyverse) # data wrangling
library(mapview) # spatial data visualizer
library(plotly) # graph / chart visualizer
library(ggiraph) # graph / chart visualizer
library(ggplot2) # graph / chart visualizer
library(DT) # fancy tables
library(scales) # scales map data to aesthetics
library(sf) # spatial data package
# use the get_acs() function for importing data
or_grads <- get_acs(
geography = "county", # county level geography
variables = "DP02_0066P", # variable for percent of the population that have a graduate degrees
state = "OR",
year = 2021
)
# output an interactive table to preview the selected data
or_grads_table <- or_grads %>%
separate(NAME, into = c("county", "state"), sep = ", ") %>% # create county and state columns
arrange(-estimate) %>% # sort by estimate descending
select(county, state, estimate, moe) # select important columns
# function from the DT library
datatable(or_grads_table)
Figure 1 above reveals the data imported from the get_acs() function. Results conclude that the top 3 counties with graduate degrees is Benton, Multnomah, and Washington. Bottom 3 counties are Malheur, Gilliam, and Morrow.
# use ggplot with estimate as the x axis and county as the y axis
or_plot <- ggplot(or_grads_table, aes(x = estimate,
y = reorder(county, estimate))) + # sort by counties high to low
geom_point(color="navy", size = 3) + # state color navy blue for point data
scale_x_continuous(labels = function(x) paste0(x, '%')) + # concatenate '%' to estimate data
scale_y_discrete(labels = function(x) str_remove(x, " County")) + # strip 'county' from the name column
labs(title = "% Population with graduate degrees, 2017-2021 ACS", # use appropriate title
subtitle = "Counties in Oregon", # use appropriate subtitle
caption = "Data acquired with R and tidycensus",
x = "ACS estimate Percentage",
y = "") +
theme_minimal(base_size = 12) # remove tick marks
or_plot # print plot
Figure 2 above plots the counties in by estimate value
in a descending order. We get a full picture of our results without
having to filter through multiple pages in a table.
# create plot revealing moe and estimate variables
or_plot_errorbar <- ggplot(or_grads, aes(x = estimate, y = reorder(NAME, estimate))) + # sort by counties high to low
geom_errorbar(aes(xmin = estimate - moe, xmax = estimate + moe),
width = 0.5, linewidth = 0.5) +
geom_point(color = "gold", size = 3) + # state color gold for point data
scale_x_continuous(labels = function(x) paste0(x, '%')) + # concatenate '%' to estimate data
scale_y_discrete(labels = function(x) str_remove(x, " County, Oregon|, Oregon")) + # strip 'county' from the name column
labs(title = "% Population with graduate degrees, 2017-2021 ACS", # use appropriate title
subtitle = "Counties in Oregon", # use appropriate subtitle
caption = "Data acquired with R and tidycensus. Error bars represent margin of error around estimates.",
"ACS estimate",
y = "",
x = "Percentage") +
theme_minimal(base_size = 12) # remove tick marks
or_plot_errorbar # print plot
Figure 3 above reveals the margin of error associated
with the estimate data.
# use ggplotly() and add a tooltip, and extend the margins of the plot
ggplotly(or_plot_errorbar, tooltip = "x")%>%
layout(margin = list(l = 50, r = 50, b = 50, t = 50))
Figure 4 Hover over the points in the plot above to see the interactive tootip.
# use the load_variables function() with the year and ACS 5 year parameter
vars <- load_variables(2021, "acs5")
# load vars into a datatable
datatable(vars)
## Warning in instance$preRenderHook(instance): It seems your data is too big for
## client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Figure 5 By using the search feature in the datatable, you see examples of available results. For example search “bachelor’s” to find variable codes moving forward.
bach_owners <- get_acs(
geography = "tract",
variables = "B25013_006",
state = "OR",
geometry = TRUE
)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 22%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|===================== | 29%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|========================== | 37%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================= | 41%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 45%
|
|================================ | 46%
|
|================================= | 48%
|
|================================== | 48%
|
|=================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 56%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 65%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================ | 69%
|
|================================================== | 71%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
bach_renters <- get_acs(
geography = "tract",
variables = "B25013_011",
state = "OR",
geometry = FALSE
)
## Getting data from the 2017-2021 5-year ACS
bach_owners <- bach_owners %>%
rename(estimate_owners = estimate, moe_owners = moe)
bach_renters <- bach_renters %>%
rename(estimate_renters = estimate, moe_renters = moe) %>%
select(GEOID, estimate_renters, moe_renters)
bach_housing <- left_join(bach_owners, bach_renters, by='GEOID') %>%
select(GEOID, NAME, estimate_owners, estimate_renters, moe_owners, moe_renters, geometry) %>%
mutate(estimate_total = estimate_renters + estimate_owners) %>%
mutate(percent_renters = (estimate_renters/estimate_total)*100) %>%
mutate(percent_owners = (estimate_owners/estimate_total)*100)
mapview(bach_housing, zcol = "percent_renters")